#!/usr/bin/env python
"""ref_test_data.py [options]

Downloads and unpacks test data in the ``ccc-gistemp-test-2009-12-28``
directory.

Options:
   --help            Print this text.
   --check-md5-sums  Checks the MD5 checksum of each input file.
   --gen-md5-sums    Calculates and prints MD5 checksums for each input file.
"""
__docformat__ = "restructuredtext"


import os
import sys
import tarfile
try:
    from hashlib import md5
except importError:
    from md5 import md5 # For older versions of Python

from . import fetch


#: The sub-directory where the 'official' test data lives.
test_data_dir = "ccc-gistemp-test-2009-12-28"

#: The 'official' tar files of the test data.
test_tar_file = "ccc-gistemp-test-2009-12-28.tar.gz"

#: The checksum for the compressed tarball.
expect_tar_checksum = "c2cf417730306986045e62b37c1887e9"


#: Checksums used to verify that the 'official' data has not
#: been indavertently corrupted.
#:
#: This can be (re)generated by executing ``ref_test_data.py`` with
#: the option ``--gen-md5-sums``.
expect_md5 = {
    'ccc-gistemp-test-2009-12-28/input/9641C_200907_F52.avg':
        '9cf933947f5a8509e6f1ce79eca6b1bc',
    'ccc-gistemp-test-2009-12-28/input/SBBX.HadR2':
        '1a739f9b79b8a4e9791c58e9aae2e201',
    'ccc-gistemp-test-2009-12-28/input/antarc1.list':
        '69130046300915d462214585a83a0fa6',
    'ccc-gistemp-test-2009-12-28/input/antarc1.txt':
        '064f5c6f78b544a9492d996d6706312e',
    'ccc-gistemp-test-2009-12-28/input/antarc2.list':
        '869def19ddabfddfb638f113715561d5',
    'ccc-gistemp-test-2009-12-28/input/antarc2.txt':
        '659d9fbdac76830d101dfff6a624dcff',
    'ccc-gistemp-test-2009-12-28/input/antarc3.list':
        'de60a7d6c593af1bd3d734b21950a8bf',
    'ccc-gistemp-test-2009-12-28/input/antarc3.txt':
        '53aaf4980f4917d6188e6c37dfb2c437',
    'ccc-gistemp-test-2009-12-28/input/mcdw.tbl':
        '2cb7012e2b39996026b0337587e71d40',
    'ccc-gistemp-test-2009-12-28/input/sumofday.tbl':
        '9fc8acf7fb64fabf01c12e06ab20974b',
    'ccc-gistemp-test-2009-12-28/input/t_hohenpeissenberg_200306.txt_as_received_July17_2003':
        '3a3ce4e705fc2cad593007237022a6ea',
    'ccc-gistemp-test-2009-12-28/input/ushcn2.tbl':
        'dcddf4ab907fcc12f8e60dc08fe60546',
    'ccc-gistemp-test-2009-12-28/input/ushcnV2_cmb.tbl':
        'ad035139edc72aa0bc65b86046c5a8aa',
    'ccc-gistemp-test-2009-12-28/input/v2.inv':
        '34d5a53e46c7e7e59e1c23459774a283',
    'ccc-gistemp-test-2009-12-28/input/v2.mean':
        '30d81e02249459b050ddfd61aeb2e39e',
    'ccc-gistemp-test-2009-12-28/result/BX.Ts.ho2.GHCN.CL.PA.1200':
        '2a1a68602e38b9e16aa7f6ed34c39ce0',
    'ccc-gistemp-test-2009-12-28/result/GLB.Ts.ho2.GHCN.CL.PA.txt':
        '92c8c7e6b5850d7f9baf55e8f42cf831',
    'ccc-gistemp-test-2009-12-28/result/NH.Ts.ho2.GHCN.CL.PA.txt':
        '4b921372615ff3ffc7723783134a499a',
    'ccc-gistemp-test-2009-12-28/result/SH.Ts.ho2.GHCN.CL.PA.txt':
        'e2e81f617bf7b1827022e4e166d281ae',
    'ccc-gistemp-test-2009-12-28/result/ZonAnn.Ts.ho2.GHCN.CL.PA.txt':
        'b6c29a2ca32a2c14cfe8c69187c93e90',
}


def calc_file_md5_sum(path):
    cx = md5()
    try:
        f = open(path, "rb")
    except IOError:
        return None
    try:
        while True:
            buf = f.read(4096)
            if not buf:
                break
            cx.update(buf)
    finally:
        f.close()

    return cx.hexdigest()


def get_test_data_checksums():
    checksums = {}
    for path in expect_md5:
        checksums[path] = calc_file_md5_sum(path)
    return checksums


def gen_test_data_checksums():
    checksums = get_test_data_checksums()
    sys.stdout.write("expect_md5 = {\n")
    for p in sorted(checksums):
        sys.stdout.write("    %r:\n        %r,\n" % (p, checksums[p]))
    sys.stdout.write("}\n")


def compare_md5_sums():
    checksums = get_test_data_checksums()
    corrupted = 0
    missing = 0
    msg = ""
    for name in sorted(expect_md5):
        md5_sum = checksums.get(name, None)
        if md5_sum is None:
            msg += "Missing: %s...\n" % name
            missing += 1
        elif md5_sum != expect_md5[name]:
            msg += "Corrupt: %s...\n" % name
            corrupt += 1

    if missing or corrupted:
        msg += "Some of the test files are missing, corrupt or unreadable\n"

    return missing, corrupted, msg


def check_test_files_are_ok():
    missing, corrupted, msg = compare_md5_sums()
    if msg:
        sys.stderr.write(msg)
        return 1
    return 0


def unpack_test_tar():
    if not os.path.exists(test_tar_file):
        return None, ""
    md5_sum = calc_file_md5_sum(test_tar_file)
    if md5_sum != expect_tar_checksum:
        return None, "File %r has wrong checksum" % test_tar_file

    try:
        archive = tarfile.open(test_tar_file, "r:gz")
    except IOError:
        return 1, "Could not read %r" % test_tar_file
    for info in archive:
        archive.extract(info)

    return 0, ""


def download_and_unpack():
    # If the tarfile is around then we might fix the problem by unpacking
    # it.
    unpacked_stat, msg = unpack_test_tar()
    if unpacked_stat is None:
        if msg:
            sys.stdout.write(msg)
        # There is no tarfile or it is broken, so try to download it and then
        # try unpacking again.
        sys.stdout.write("Trying to download %r..." % test_tar_file)
        fetch.fetch(["ccc-gistemp-test-2009-12-28.tar"], ".")
        unpacked_stat, msg = unpack_test_tar()
        if unpacked_stat != 0:
            sys.stderr.write(msg)
            return 1

    elif unpacked_stat > 0:
        sys.stderr.write(msg)
        return 1

    return 0


def install_and_check_test_files():
    """Install and chack the 'official' test files.

    This checks that the unpacked set of 'official' test of files is OK, by
    verifying MD5 checksums of each file.

    If any problem is found, the function attempts to (re)unpack the
    test files tarball, downloading it first if necessary.
    
    """
    missing, corrupted, msg = compare_md5_sums()
    if not (corrupted or missing):
        return 0

    sys.stdout.write(msg)
    if download_and_unpack() != 0:
        sys.stderr.write("Could not get test files installed")
        return 1

    missing, corrupted, msg = compare_md5_sums()
    if not (corrupted or missing):
        return 0

    sys.stderr.write(msg)
    return 1


class Fatal(Exception):
    def __init__(self, msg):
        self.msg = msg

def main(args):
    try:
        gen_md5_sums = False
        check_md5_sums = False
        try:
            opts, args = getopt.getopt(argv[1:], 'hgc',
                                       ['help', 'gen-md5-sums', 'check-md5-sums'])
            for o, a in opts:
                if o in ('-h', '--help'):
                    print(__doc__)
                    return 0
                elif o in ('-g', '--gen-md5-sums'):
                    gen_md5_sums = True
                elif o in ('-c', '--check-md5-sums'):
                    check_md5_sums = True
                else:
                    raise Fatal("Unsupported option: %s" % o)
        except getopt.error as msg:
            raise Fatal(str(msg))

        if gen_md5_sums:
            gen_test_data_checksums()
        elif check_md5_sums:
            check_test_files_are_ok()
        else:
            install_and_check_test_files()
        return 0
    except Fatal as err:
        sys.stderr.write(err.msg)
        sys.stderr.write('\n')
        return 2

if __name__ == '__main__':
    sys.exit(main())
